For the reader: You can click the black “code” box on the right hand side to either see or hide the R code behind analysis results and graphs.

The results are arranged to separate tabs by site/area, so that it’s easy to look at just the results of the site of interest, instead of scrolling accidentally down to a different site and getting confused.

The results for each site consist of four parts: pXRF, particle size distribution, loss on ignition, and colorimetry.

Sites

Ashdod-Yam

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);

library(GGally); 
library(corrplot); 
library(tidyr); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AY.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
values <- c("MgO", "Al2O3", "SiO2", 
            "S", "Cl", "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing all columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing all columns that contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

pxrf_no_na:

summary(pxrf_no_na)
##                Area             Type        Al2O3               SiO2           
##  Area D Acropolis:19   mud mortar : 1   Min.   :-1.20092   Min.   :-1.3142038  
##  Area D Wall     :22   mud plaster: 5   1st Qu.:-0.49712   1st Qu.:-0.4978527  
##  Builder's floor : 5   mudbrick   :35   Median :-0.23740   Median :-0.0003512  
##  Hellenistic     : 1   PEM        : 6   Mean   : 0.04576   Mean   : 0.0389010  
##                                         3rd Qu.: 0.54615   3rd Qu.: 0.5568590  
##                                         Max.   : 1.88920   Max.   : 1.3770734  
##       K2O                CaO                 Ti                 Mn          
##  Min.   :-1.08731   Min.   :-1.56460   Min.   :-1.17385   Min.   :-1.00032  
##  1st Qu.:-0.54550   1st Qu.:-0.45677   1st Qu.:-0.47636   1st Qu.:-0.50265  
##  Median :-0.02978   Median : 0.07788   Median :-0.04881   Median :-0.13876  
##  Mean   : 0.04621   Mean   : 0.06819   Mean   : 0.04264   Mean   : 0.03671  
##  3rd Qu.: 0.39368   3rd Qu.: 0.64823   3rd Qu.: 0.53925   3rd Qu.: 0.44988  
##  Max.   : 2.35094   Max.   : 1.91455   Max.   : 2.05476   Max.   : 1.93487  
##        Fe                 Cu                 Zn                 Rb          
##  Min.   :-1.10643   Min.   :-1.50957   Min.   :-1.03131   Min.   :-1.28223  
##  1st Qu.:-0.49989   1st Qu.:-0.49095   1st Qu.:-0.61933   1st Qu.:-0.54198  
##  Median :-0.09144   Median : 0.18813   Median :-0.20735   Median :-0.13073  
##  Mean   : 0.04543   Mean   : 0.04846   Mean   : 0.02864   Mean   : 0.04121  
##  3rd Qu.: 0.46449   3rd Qu.: 0.64086   3rd Qu.: 0.29970   3rd Qu.: 0.68149  
##  Max.   : 2.31412   Max.   : 1.88584   Max.   : 2.45465   Max.   : 1.92553  
##        Sr                 Zr          
##  Min.   :-1.85766   Min.   :-1.61460  
##  1st Qu.:-0.41969   1st Qu.:-0.60738  
##  Median : 0.10393   Median : 0.05199  
##  Mean   : 0.07807   Mean   : 0.05398  
##  3rd Qu.: 0.51031   3rd Qu.: 0.55041  
##  Max.   : 2.08896   Max.   : 2.11835

pxrf_all:

summary(pxrf_all)
##                Area             Type         MgO               Al2O3         
##  Area D Acropolis:19   mud mortar : 1   Min.   :-1.23821   Min.   :-1.20092  
##  Area D Wall     :22   mud plaster: 5   1st Qu.:-0.43843   1st Qu.:-0.49712  
##  Builder's floor : 5   mudbrick   :35   Median : 0.02957   Median :-0.23740  
##  Hellenistic     : 1   PEM        : 6   Mean   : 0.03914   Mean   : 0.04576  
##                                         3rd Qu.: 0.42944   3rd Qu.: 0.54615  
##                                         Max.   : 1.75657   Max.   : 1.88920  
##                                         NA's   :1                            
##       SiO2                  S                 Cl                K2O          
##  Min.   :-1.3142038   Min.   :-0.2340   Min.   :-0.88004   Min.   :-1.08731  
##  1st Qu.:-0.4978527   1st Qu.:-0.2328   1st Qu.:-0.51354   1st Qu.:-0.54550  
##  Median :-0.0003512   Median :-0.2107   Median :-0.10742   Median :-0.02978  
##  Mean   : 0.0389010   Mean   :-0.2000   Mean   : 0.01773   Mean   : 0.04621  
##  3rd Qu.: 0.5568590   3rd Qu.:-0.1779   3rd Qu.: 0.17488   3rd Qu.: 0.39368  
##  Max.   : 1.3770734   Max.   :-0.1447   Max.   : 3.94139   Max.   : 2.35094  
##                       NA's   :43        NA's   :1                            
##       CaO                 Ti                 V                  Mn          
##  Min.   :-1.56460   Min.   :-1.17385   Min.   :-0.98017   Min.   :-1.00032  
##  1st Qu.:-0.45677   1st Qu.:-0.47636   1st Qu.:-0.26027   1st Qu.:-0.50265  
##  Median : 0.07788   Median :-0.04881   Median : 0.08755   Median :-0.13876  
##  Mean   : 0.06819   Mean   : 0.04264   Mean   : 0.08074   Mean   : 0.03671  
##  3rd Qu.: 0.64823   3rd Qu.: 0.53925   3rd Qu.: 0.42728   3rd Qu.: 0.44988  
##  Max.   : 1.91455   Max.   : 2.05476   Max.   : 1.57588   Max.   : 1.93487  
##                                        NA's   :28                           
##        Fe                 Ni                 Cu                 Zn          
##  Min.   :-1.10643   Min.   :-1.10296   Min.   :-1.50957   Min.   :-1.03131  
##  1st Qu.:-0.49989   1st Qu.:-0.44318   1st Qu.:-0.49095   1st Qu.:-0.61933  
##  Median :-0.09144   Median :-0.05642   Median : 0.18813   Median :-0.20735  
##  Mean   : 0.04543   Mean   : 0.01430   Mean   : 0.04846   Mean   : 0.02864  
##  3rd Qu.: 0.46449   3rd Qu.: 0.53510   3rd Qu.: 0.64086   3rd Qu.: 0.29970  
##  Max.   : 2.31412   Max.   : 1.71814   Max.   : 1.88584   Max.   : 2.45465  
##                     NA's   :1                                               
##        As                Rb                 Sr                 Y           
##  Min.   :-0.8953   Min.   :-1.28223   Min.   :-1.85766   Min.   :-1.16797  
##  1st Qu.:-0.2961   1st Qu.:-0.54198   1st Qu.:-0.41969   1st Qu.:-0.41036  
##  Median : 0.2033   Median :-0.13073   Median : 0.10393   Median :-0.05384  
##  Mean   : 0.2832   Mean   : 0.04121   Mean   : 0.07807   Mean   : 0.04866  
##  3rd Qu.: 0.6028   3rd Qu.: 0.68149   3rd Qu.: 0.51031   3rd Qu.: 0.39182  
##  Max.   : 2.3007   Max.   : 1.92553   Max.   : 2.08896   Max.   : 2.26357  
##  NA's   :27                                              NA's   :2         
##        Zr                 Nb          
##  Min.   :-1.61460   Min.   :-0.89312  
##  1st Qu.:-0.60738   1st Qu.:-0.48298  
##  Median : 0.05199   Median :-0.07283  
##  Mean   : 0.05398   Mean   : 0.06816  
##  3rd Qu.: 0.55041   3rd Qu.: 0.43985  
##  Max.   : 2.11835   Max.   : 2.18298  
##                     NA's   :31

pXRF: correlations

# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)

# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:12], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_1[3:12], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_1, mapping = aes(col = cluster_x))

pXRF: PCA

PCA with only the no-NA:s elements:

# Elements with no NA values at all
colnames(pxrf_no_na)
##  [1] "Area"  "Type"  "Al2O3" "SiO2"  "K2O"   "CaO"   "Ti"    "Mn"    "Fe"   
## [10] "Cu"    "Zn"    "Rb"    "Sr"    "Zr"
# PCA
pca_1 <- prcomp(pxrf_1[3:12])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     1.9037 1.0026 0.8270 0.67213 0.37446 0.35729 0.29419
## Proportion of Variance 0.5809 0.1611 0.1096 0.07242 0.02248 0.02046 0.01387
## Cumulative Proportion  0.5809 0.7421 0.8517 0.92413 0.94661 0.96707 0.98095
##                            PC8     PC9    PC10
## Standard deviation     0.26346 0.16502 0.14906
## Proportion of Variance 0.01113 0.00437 0.00356
## Cumulative Proportion  0.99207 0.99644 1.00000
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam elements")

autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by area")

autoplot(pca_1, data=pxrf_1, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by sample type")

PCA with all the feasible elements:

# Elements that have at least one viable value
colnames(pxrf_all)
##  [1] "Area"  "Type"  "MgO"   "Al2O3" "SiO2"  "S"     "Cl"    "K2O"   "CaO"  
## [10] "Ti"    "V"     "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "As"    "Rb"   
## [19] "Sr"    "Y"     "Zr"    "Nb"
# PCA with NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

pca_2 <- prcomp(pxrf_all[3:22])
summary(pca_2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.1392 1.2623 1.1731 0.77694 0.75938 0.56778 0.50254
## Proportion of Variance 0.4455 0.1551 0.1340 0.05877 0.05614 0.03138 0.02459
## Cumulative Proportion  0.4455 0.6006 0.7346 0.79337 0.84951 0.88090 0.90548
##                            PC8     PC9   PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.44075 0.42941 0.3738 0.33174 0.29859 0.26048 0.24174
## Proportion of Variance 0.01891 0.01795 0.0136 0.01071 0.00868 0.00661 0.00569
## Cumulative Proportion  0.92439 0.94235 0.9559 0.96666 0.97534 0.98195 0.98764
##                           PC15    PC16    PC17    PC18    PC19    PC20
## Standard deviation     0.22407 0.16224 0.16055 0.12292 0.08709 0.04448
## Proportion of Variance 0.00489 0.00256 0.00251 0.00147 0.00074 0.00019
## Cumulative Proportion  0.99253 0.99509 0.99760 0.99907 0.99981 1.00000
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam more elements")

autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam more elements grouped by area")

autoplot(pca_1, data=pxrf_1, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam more elements grouped by sample type")

pXRF: HCA

# HCA

# compute divisive hierarchical clustering
hc4 <- diana(pxrf_1[3:12])

# Divise coefficient; amount of clustering structure found
hc4$dc
## [1] 0.8007861
# Diana dendrogram
pltree(hc4, cex = 1, hang = -1, main = "Dendrogram of diana")

# HCA dendrogam 1
dend <- 
    pxrf_1[3:12] %>%                    # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=5) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=5) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2: 
HCA.ward5 <- hclust(dist(pxrf_1[3:12], method="euclid"), method="ward.D2")
plot(HCA.ward5, main="Ward's method")
rect.hclust(HCA.ward5, k=5)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=2)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_AY.xlsx", sep=";")
tga <- read.xlsx("data/tga_AY.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:52          Min.   :8.079   Min.   :10.13   Min.   :1.538  
##  Class :character   1st Qu.:8.566   1st Qu.:11.00   1st Qu.:2.464  
##  Mode  :character   Median :9.006   Median :11.71   Median :2.635  
##                     Mean   :8.994   Mean   :11.55   Mean   :2.559  
##                     3rd Qu.:9.435   3rd Qu.:12.05   3rd Qu.:2.824  
##                     Max.   :9.925   Max.   :12.68   Max.   :3.043  
##    dry_weight     after_550_C     after_950_C      context         
##  Min.   :10.11   Min.   :10.08   Min.   :10.05   Length:52         
##  1st Qu.:10.97   1st Qu.:10.94   1st Qu.:10.86   Class :character  
##  Median :11.67   Median :11.62   Median :11.53   Mode  :character  
##  Mean   :11.52   Mean   :11.47   Mean   :11.41                     
##  3rd Qu.:12.00   3rd Qu.:11.96   3rd Qu.:11.93                     
##  Max.   :12.65   Max.   :12.60   Max.   :12.52                     
##      type          
##  Length:52         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:53          Length:53          Length:53          Length:53         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:53          Length:53          Length:53          Length:53         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:53         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1] 0.398150 1.528018 2.198513 3.162314 4.889189
## 
## $n
## [1] 52
## 
## $conf
## [1] 1.840428 2.556598
## 
## $out
## [1]  8.041755 11.960444
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
##  [1] "AY-1"    "AY-2"    "AY-3"    "AY-4"    "AY-5"    "AY-6"    "AY-7"   
##  [8] "AY-8"    "AY-9"    "AY-10"   "AY-11"   "AY-12"   "AY-13"   "AY-14"  
## [15] "AY-15"   "AY-16"   "AY-17"   "AY-18"   "AY-19"   "AY-20"   "AY-21"  
## [22] "AY-22"   "AY-23"   "AY-24"   "AY-25"   "AY-26"   "AY-27"   "AY-28"  
## [29] "AY-29"   "AY-30"   "AY-31"   "AY-32"   "AY-33"   "AY-34"   "AY-35"  
## [36] "AY-36"   "AY-37"   "AY-38"   "AY-39"   "AY-40"   "AY-41"   "AY-42"  
## [43] "AY-50"   "AY-51"   "AY-52"   "AY-53"   "AY-54"   "AY-50_2" "AY-51_2"
## [50] "AY-52_2" "AY-53_2" "AY-54_2"
loi <- subset(loi[1:47, ])

ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(type), colour = factor(context))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(loi$c550)

barplot(loi$c950)

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1] 0.7233976 1.5855926 2.3958197 3.4621816 5.4590326
## 
## $n
## [1] 50
## 
## $conf
## [1] 1.976504 2.815135
## 
## $out
## [1] 7.994718
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AY.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :37.56   Min.   : 7.97   Min.   :16.81  
##  1st Qu.:42.13   1st Qu.: 9.72   1st Qu.:19.76  
##  Median :43.64   Median :10.38   Median :21.18  
##  Mean   :43.92   Mean   :10.49   Mean   :21.22  
##  3rd Qu.:45.91   3rd Qu.:11.29   3rd Qu.:22.34  
##  Max.   :48.24   Max.   :13.53   Max.   :25.03
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Ashdod-Yam: Byzantine

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);

library(GGally); 
library(corrplot); 
library(tidyr); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AYB.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
# Removing "Na", S", "Cl", "P" and "Ba" already
values <- c("MgO", "Al2O3", "SiO2", 
            "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

pxrf_no_na:

summary(pxrf_no_na)
##         Area        Type       Al2O3                 SiO2        
##  Byzantine:7   ceiling:3   Min.   :-0.6649899   Min.   :-0.7149  
##                floor  :1   1st Qu.:-0.5260944   1st Qu.:-0.5619  
##                PEM    :3   Median :-0.3729803   Median :-0.3191  
##                            Mean   : 0.0000000   Mean   : 0.0000  
##                            3rd Qu.:-0.0009942   3rd Qu.: 0.1869  
##                            Max.   : 2.0921476   Max.   : 1.7841  
##       CaO                Ti                   Mn                Fe         
##  Min.   :-1.6463   Min.   :-0.6002745   Min.   :-0.5364   Min.   :-0.9003  
##  1st Qu.:-0.5655   1st Qu.:-0.4813714   1st Qu.:-0.4011   1st Qu.:-0.6185  
##  Median : 0.3098   Median :-0.3873934   Median :-0.3370   Median :-0.4877  
##  Mean   : 0.0000   Mean   : 0.0000000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.6541   3rd Qu.: 0.0005995   3rd Qu.:-0.1593   3rd Qu.: 0.5536  
##  Max.   : 1.1593   Max.   : 1.9492115   Max.   : 1.9942   Max.   : 1.5177  
##        Ni                Cu                Zn                Sr         
##  Min.   :-0.9780   Min.   :-1.3184   Min.   :-0.7633   Min.   :-0.7906  
##  1st Qu.:-0.4890   1st Qu.:-0.2432   1st Qu.:-0.5817   1st Qu.:-0.3883  
##  Median :-0.2574   Median :-0.1984   Median :-0.1408   Median :-0.2674  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.3089   3rd Qu.: 0.1600   3rd Qu.: 0.3261   3rd Qu.: 0.4585  
##  Max.   : 1.5957   Max.   : 1.6832   Max.   : 1.4154   Max.   : 0.9176  
##        Zr          
##  Min.   :-0.69215  
##  1st Qu.:-0.54451  
##  Median :-0.24621  
##  Mean   : 0.00000  
##  3rd Qu.: 0.02497  
##  Max.   : 1.97745

pxrf_all:

summary(pxrf_all)
##         Area        Type        MgO              Al2O3           
##  Byzantine:7   ceiling:3   Min.   :-0.9836   Min.   :-0.6649899  
##                floor  :1   1st Qu.:-0.4419   1st Qu.:-0.5260944  
##                PEM    :3   Median :-0.2397   Median :-0.3729803  
##                            Mean   :-0.1222   Mean   : 0.0000000  
##                            3rd Qu.: 0.2820   3rd Qu.:-0.0009942  
##                            Max.   : 0.7831   Max.   : 2.0921476  
##                            NA's   :1                             
##       SiO2              K2O                CaO                Ti            
##  Min.   :-0.7149   Min.   :-1.03235   Min.   :-1.6463   Min.   :-0.6002745  
##  1st Qu.:-0.5619   1st Qu.:-0.54671   1st Qu.:-0.5655   1st Qu.:-0.4813714  
##  Median :-0.3191   Median :-0.25114   Median : 0.3098   Median :-0.3873934  
##  Mean   : 0.0000   Mean   : 0.06213   Mean   : 0.0000   Mean   : 0.0000000  
##  3rd Qu.: 0.1869   3rd Qu.: 0.29801   3rd Qu.: 0.6541   3rd Qu.: 0.0005995  
##  Max.   : 1.7841   Max.   : 1.84282   Max.   : 1.1593   Max.   : 1.9492115  
##                    NA's   :2                                                
##        Mn                Fe                Ni                Cu         
##  Min.   :-0.5364   Min.   :-0.9003   Min.   :-0.9780   Min.   :-1.3184  
##  1st Qu.:-0.4011   1st Qu.:-0.6185   1st Qu.:-0.4890   1st Qu.:-0.2432  
##  Median :-0.3370   Median :-0.4877   Median :-0.2574   Median :-0.1984  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.:-0.1593   3rd Qu.: 0.5536   3rd Qu.: 0.3089   3rd Qu.: 0.1600  
##  Max.   : 1.9942   Max.   : 1.5177   Max.   : 1.5957   Max.   : 1.6832  
##                                                                         
##        Zn                As                 Rb                 Sr         
##  Min.   :-0.7633   Min.   :-0.88791   Min.   :-0.83065   Min.   :-0.7906  
##  1st Qu.:-0.5817   1st Qu.:-0.56643   1st Qu.:-0.55508   1st Qu.:-0.3883  
##  Median :-0.1408   Median :-0.03062   Median :-0.12204   Median :-0.2674  
##  Mean   : 0.0000   Mean   : 0.09185   Mean   :-0.08267   Mean   : 0.0000  
##  3rd Qu.: 0.3261   3rd Qu.: 0.82668   3rd Qu.: 0.25195   3rd Qu.: 0.4585  
##  Max.   : 1.4154   Max.   : 1.13285   Max.   : 0.90151   Max.   : 0.9176  
##                    NA's   :1          NA's   :1                           
##        Y                  Zr                 Nb                Ag   
##  Min.   :-0.82699   Min.   :-0.69215   Min.   :-0.2048   Min.   :0  
##  1st Qu.:-0.64944   1st Qu.:-0.54451   1st Qu.:-0.2048   1st Qu.:0  
##  Median :-0.29435   Median :-0.24621   Median :-0.2048   Median :0  
##  Mean   : 0.06074   Mean   : 0.00000   Mean   :-0.2048   Mean   :0  
##  3rd Qu.: 0.59337   3rd Qu.: 0.02497   3rd Qu.:-0.2048   3rd Qu.:0  
##  Max.   : 1.48110   Max.   : 1.97745   Max.   :-0.2048   Max.   :0  
##  NA's   :2                             NA's   :6         NA's   :6  
##        Pb          
##  Min.   :-0.00135  
##  1st Qu.: 0.29028  
##  Median : 0.39289  
##  Mean   : 0.40100  
##  3rd Qu.: 0.50361  
##  Max.   : 0.81954  
##  NA's   :3

pXRF: K means cluster analysis

# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)

# Optimal number of clusters in K-means analysis: 2
k_max <- 6
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:11], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

pXRF: PCA

PCA with only the no-NA:s elements:

# PCA
pca_1 <- prcomp(pxrf_1[3:11])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6       PC7
## Standard deviation     2.2445 0.8845 0.75353 0.40411 0.30308 0.06678 1.703e-16
## Proportion of Variance 0.7578 0.1177 0.08542 0.02457 0.01382 0.00067 0.000e+00
## Cumulative Proportion  0.7578 0.8755 0.96094 0.98551 0.99933 1.00000 1.000e+00
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA limited elements")

autoplot(pca_1, data=pxrf_1, colour='Type', shape = FALSE, label = TRUE,  main = "PCA grouped by area")

PCA with all the feasible elements:

# PCA with all elements, NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

pca_2 <- prcomp(pxrf_all[3:21])
summary(pca_2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5    PC6       PC7
## Standard deviation     2.6967 1.4173 1.03547 0.69371 0.65343 0.3604 2.873e-16
## Proportion of Variance 0.6384 0.1763 0.09413 0.04225 0.03748 0.0114 0.000e+00
## Cumulative Proportion  0.6384 0.8147 0.90887 0.95111 0.98860 1.0000 1.000e+00
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA more elements")

autoplot(pca_2, data=pxrf_all, colour='Type', shape = FALSE, label = TRUE,  main = "PCA more elements grouped by area")

pXRF: HCA

# HCA

# HCA dendrogam 1
dend <- 
    pxrf_1[3:11] %>%                    # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=4) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=4) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2: 
HCA.ward4 <- hclust(dist(pxrf_1[3:11], method="euclid"), method="ward.D2")
plot(HCA.ward4, main="HCA with clusters")
rect.hclust(HCA.ward4, k=4)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AYB.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_AYB.xlsx", sep=";")
tga <- read.xlsx("data/tga_AYB.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:7           Min.   :8.382   Min.   :10.92   Min.   :2.257  
##  Class :character   1st Qu.:8.555   1st Qu.:11.39   1st Qu.:2.613  
##  Mode  :character   Median :8.666   Median :11.50   Median :2.880  
##                     Mean   :8.943   Mean   :11.68   Mean   :2.739  
##                     3rd Qu.:9.442   3rd Qu.:12.13   3rd Qu.:2.905  
##                     Max.   :9.560   Max.   :12.31   Max.   :3.002  
##    dry_weight     after_550_C     after_950_C      context         
##  Min.   :10.83   Min.   :10.62   Min.   :10.50   Length:7          
##  1st Qu.:11.33   1st Qu.:11.25   1st Qu.:11.04   Class :character  
##  Median :11.45   Median :11.39   Median :11.21   Mode  :character  
##  Mean   :11.63   Mean   :11.55   Mean   :11.24                     
##  3rd Qu.:12.11   3rd Qu.:12.05   3rd Qu.:11.43                     
##  Max.   :12.26   Max.   :12.24   Max.   :12.06                     
##      type          
##  Length:7          
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date      Batch               Wet           
##  Length:7           Min.   :44536   Length:7           Length:7          
##  Class :character   1st Qu.:44536   Class :character   Class :character  
##  Mode  :character   Median :44536   Mode  :character   Mode  :character  
##                     Mean   :44536                                        
##                     3rd Qu.:44536                                        
##                     Max.   :44536                                        
##      Dry              Mass_550           Mass_950           context         
##  Length:7           Length:7           Length:7           Length:7          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      type          
##  Length:7          
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1]  1.087679  5.239954  6.463715 14.575647 15.544905
## 
## $n
## [1] 7
## 
## $conf
## [1]  0.8885902 12.0388404
## 
## $out
## [1] 32.40429
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
## [1] "AY-43" "AY-44" "AY-45" "AY-46" "AY-47" "AY-48" "AY-49"
loi <- subset(loi[1:47, ])

### ggplots later

barplot(loi$c550)

barplot(loi$c950)

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Total distribution of LOI in 550 C (1) and 950 C (2)
boxplot(loi$c550, loi$c950)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1]  6.813924  8.278065  9.781651 11.887838 12.043832
## 
## $n
## [1] 7
## 
## $conf
## [1]  7.625953 11.937349
## 
## $out
## [1] 29.72412
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AYB.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :34.56   Min.   : 2.480   Min.   : 6.98  
##  1st Qu.:38.93   1st Qu.: 5.215   1st Qu.:12.73  
##  Median :45.30   Median : 5.540   Median :13.28  
##  Mean   :46.69   Mean   : 6.211   Mean   :14.56  
##  3rd Qu.:50.95   3rd Qu.: 6.675   3rd Qu.:16.21  
##  Max.   :67.20   Max.   :11.680   Max.   :23.75
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Israel

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);

library(GGally); 
library(corrplot); 
library(tidyr); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters

# Read the raw data
pxrf <- read.xlsx("data/pxrf_israel.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
# Removing "Na", S", "Cl", "P" and "Ba" already
values <- c("MgO", "Al2O3", "SiO2", 
            "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

pxrf_no_na:

summary(pxrf_no_na)
##              Area              Type         MgO              Al2O3        
##  Ad-Halom      :9   floor        : 2   Min.   :-1.2243   Min.   :-1.1535  
##  Rishon Le'Zion:1   floor plaster: 1   1st Qu.:-0.3655   1st Qu.:-0.7296  
##  Tell Iztabba  :9   lime plaster : 4   Median : 0.3253   Median :-0.1502  
##                     mudbrick     :12   Mean   : 0.2570   Mean   :-0.1498  
##                                        3rd Qu.: 0.7669   3rd Qu.: 0.3186  
##                                        Max.   : 2.2752   Max.   : 1.0340  
##       CaO                Ti                 Mn                Fe          
##  Min.   :-1.1324   Min.   :-1.62849   Min.   :-1.1564   Min.   :-1.21031  
##  1st Qu.:-0.3895   1st Qu.:-0.59656   1st Qu.:-0.7510   1st Qu.:-0.75649  
##  Median : 0.2485   Median :-0.02908   Median :-0.1021   Median : 0.09126  
##  Mean   : 0.1365   Mean   :-0.17581   Mean   :-0.1364   Mean   :-0.06331  
##  3rd Qu.: 0.6235   3rd Qu.: 0.38876   3rd Qu.: 0.2178   3rd Qu.: 0.47399  
##  Max.   : 2.0327   Max.   : 1.15358   Max.   : 1.4046   Max.   : 1.24389  
##        Cu                 Zn                 Sr                Zr         
##  Min.   :-1.23132   Min.   :-1.47722   Min.   :-1.1311   Min.   :-1.9036  
##  1st Qu.:-0.40202   1st Qu.:-0.56202   1st Qu.:-0.5905   1st Qu.:-1.2329  
##  Median : 0.06850   Median :-0.11447   Median :-0.2214   Median :-0.1911  
##  Mean   : 0.03885   Mean   : 0.07566   Mean   :-0.1056   Mean   :-0.3629  
##  3rd Qu.: 0.38611   3rd Qu.: 0.91136   3rd Qu.: 0.3090   3rd Qu.: 0.4183  
##  Max.   : 1.19776   Max.   : 1.39913   Max.   : 1.7253   Max.   : 1.4929

pxrf_all:

summary(pxrf_all)
##              Area              Type         MgO              Al2O3        
##  Ad-Halom      :9   floor        : 2   Min.   :-1.2243   Min.   :-1.1535  
##  Rishon Le'Zion:1   floor plaster: 1   1st Qu.:-0.3655   1st Qu.:-0.7296  
##  Tell Iztabba  :9   lime plaster : 4   Median : 0.3253   Median :-0.1502  
##                     mudbrick     :12   Mean   : 0.2570   Mean   :-0.1498  
##                                        3rd Qu.: 0.7669   3rd Qu.: 0.3186  
##                                        Max.   : 2.2752   Max.   : 1.0340  
##                                                                           
##       SiO2               K2O                 CaO                Ti          
##  Min.   :-1.63885   Min.   :-0.379762   Min.   :-1.1324   Min.   :-1.62849  
##  1st Qu.:-0.60735   1st Qu.: 0.007179   1st Qu.:-0.3895   1st Qu.:-0.59656  
##  Median :-0.05503   Median : 0.329753   Median : 0.2485   Median :-0.02908  
##  Mean   :-0.11085   Mean   : 0.326436   Mean   : 0.1365   Mean   :-0.17581  
##  3rd Qu.: 0.33797   3rd Qu.: 0.477998   3rd Qu.: 0.6235   3rd Qu.: 0.38876  
##  Max.   : 1.39686   Max.   : 1.190872   Max.   : 2.0327   Max.   : 1.15358  
##  NA's   :1          NA's   :8                                               
##        Mn                Fe                 Ni                 Cu          
##  Min.   :-1.1564   Min.   :-1.21031   Min.   :-0.85215   Min.   :-1.23132  
##  1st Qu.:-0.7510   1st Qu.:-0.75649   1st Qu.:-0.40208   1st Qu.:-0.40202  
##  Median :-0.1021   Median : 0.09126   Median :-0.18228   Median : 0.06850  
##  Mean   :-0.1364   Mean   :-0.06331   Mean   : 0.01659   Mean   : 0.03885  
##  3rd Qu.: 0.2178   3rd Qu.: 0.47399   3rd Qu.: 0.26255   3rd Qu.: 0.38611  
##  Max.   : 1.4046   Max.   : 1.24389   Max.   : 1.17838   Max.   : 1.19776  
##                                       NA's   :11                           
##        Zn                 Rb                Sr                Y           
##  Min.   :-1.47722   Min.   :-0.9609   Min.   :-1.1311   Min.   :-0.85071  
##  1st Qu.:-0.56202   1st Qu.:-0.3269   1st Qu.:-0.5905   1st Qu.:-0.47216  
##  Median :-0.11447   Median : 0.3071   Median :-0.2214   Median :-0.09361  
##  Mean   : 0.07566   Mean   : 0.2332   Mean   :-0.1056   Mean   :-0.30009  
##  3rd Qu.: 0.91136   3rd Qu.: 0.6637   3rd Qu.: 0.3090   3rd Qu.:-0.02478  
##  Max.   : 1.39913   Max.   : 1.4364   Max.   : 1.7253   Max.   : 0.04405  
##                     NA's   :6                           NA's   :16        
##        Zr                Nb                Ag         
##  Min.   :-1.9036   Min.   :-0.1096   Min.   :0.07923  
##  1st Qu.:-1.2329   1st Qu.: 0.2421   1st Qu.:0.07923  
##  Median :-0.1911   Median : 0.5937   Median :0.07923  
##  Mean   :-0.3629   Mean   : 0.5937   Mean   :0.07923  
##  3rd Qu.: 0.4183   3rd Qu.: 0.9454   3rd Qu.:0.07923  
##  Max.   : 1.4929   Max.   : 1.2971   Max.   :0.07923  
##                    NA's   :17        NA's   :18

pXRF: K means cluster analysis

# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)

# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:10], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_1[3:10], centers = 4)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_1, mapping = aes(col = cluster_x))

pXRF: PCA

PCA with only the no-NA:s elements:

# PCA
pca_1 <- prcomp(pxrf_1[3:10])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5    PC6     PC7
## Standard deviation     1.7341 1.1365 0.8140 0.51966 0.42169 0.2369 0.15638
## Proportion of Variance 0.5463 0.2346 0.1204 0.04906 0.03231 0.0102 0.00444
## Cumulative Proportion  0.5463 0.7809 0.9013 0.95038 0.98268 0.9929 0.99732
##                            PC8
## Standard deviation     0.12142
## Proportion of Variance 0.00268
## Cumulative Proportion  1.00000
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA limited elements")

autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE,  main = "PCA grouped by area")

autoplot(pca_1, data=pxrf_1, colour='Type', shape = FALSE, label = TRUE,  main = "PCA grouped by type")

PCA with all the feasible elements:

# PCA with all elements, NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

pca_2 <- prcomp(pxrf_all[3:19])
summary(pca_2)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.0585 1.2500 0.8709 0.73570 0.63683 0.41024 0.36958
## Proportion of Variance 0.5252 0.1937 0.0940 0.06709 0.05027 0.02086 0.01693
## Cumulative Proportion  0.5252 0.7189 0.8129 0.88000 0.93027 0.95113 0.96806
##                            PC8     PC9   PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.26815 0.23622 0.2126 0.18137 0.17097 0.09826 0.07320
## Proportion of Variance 0.00891 0.00692 0.0056 0.00408 0.00362 0.00120 0.00066
## Cumulative Proportion  0.97698 0.98389 0.9895 0.99357 0.99720 0.99839 0.99906
##                           PC15    PC16      PC17
## Standard deviation     0.07015 0.05176 0.0002973
## Proportion of Variance 0.00061 0.00033 0.0000000
## Cumulative Proportion  0.99967 1.00000 1.0000000
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA more elements")

autoplot(pca_2, data=pxrf_all, colour='Area', shape = FALSE, label = TRUE,  main = "PCA more elements grouped by area")

autoplot(pca_2, data=pxrf_all, colour='Type', shape = FALSE, label = TRUE,  main = "PCA more elements grouped by type")

pXRF: HCA

# HCA dendrogam 1
dend <- 
    pxrf_1[3:10] %>%                    # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=4) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=6) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2: 
HCA.ward4 <- hclust(dist(pxrf_1[3:10], method="euclid"), method="ward.D2")
plot(HCA.ward4, main="HCA with clusters")
rect.hclust(HCA.ward4, k=6)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_israel.xlsx", sep=";")
tga <- read.xlsx("data/tga_israel.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:19          Min.   :7.921   Min.   :10.55   Min.   :2.081  
##  Class :character   1st Qu.:8.557   1st Qu.:11.23   1st Qu.:2.403  
##  Mode  :character   Median :9.278   Median :11.78   Median :2.584  
##                     Mean   :9.017   Mean   :11.63   Mean   :2.611  
##                     3rd Qu.:9.452   3rd Qu.:11.92   3rd Qu.:2.828  
##                     Max.   :9.787   Max.   :12.62   Max.   :3.231  
##    dry_weight     after_550_C     after_950_C       context         
##  Min.   :10.52   Min.   :10.42   Min.   : 9.773   Length:19         
##  1st Qu.:11.23   1st Qu.:11.18   1st Qu.:10.376   Class :character  
##  Median :11.76   Median :11.70   Median :10.971   Mode  :character  
##  Mean   :11.60   Mean   :11.53   Mean   :10.884                     
##  3rd Qu.:11.88   3rd Qu.:11.78   3rd Qu.:11.322                     
##  Max.   :12.56   Max.   :12.49   Max.   :12.371                     
##      type          
##  Length:19         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:12         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1]  3.72729 16.97425 29.14914 36.98187 42.60863
## 
## $n
## [1] 19
## 
## $conf
## [1] 21.89685 36.40144
## 
## $out
## numeric(0)
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
##  [1] "AH-1"  "AH-2"  "AH-3"  "AH-4"  "AH-5"  "AH-6"  "AH-7"  "AH-8"  "AH-9" 
## [10] "TI-1"  "TI-2"  "TI-3"  "TI-4"  "TI-5"  "TI-6"  "TI-7"  "TI-8"  "TI-10"
## [19] "RLZ-1"
loi <- subset(loi[1:72, ])

ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

ggplot(loi, 
      aes(c550, c950)) +
      geom_point(size=2, aes(shape = factor(type), colour = factor(context))) +
      labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

### add ggplots later with context and type

barplot(loi$c550)

barplot(loi$c950)

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1]  4.169125 16.340505 30.528569 41.702750 42.691722
## 
## $n
## [1] 12
## 
## $conf
## [1] 18.96068 42.09646
## 
## $out
## numeric(0)
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_israel.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :43.75   Min.   : 3.160   Min.   :13.85  
##  1st Qu.:56.08   1st Qu.: 3.770   1st Qu.:14.71  
##  Median :64.30   Median : 4.350   Median :15.85  
##  Mean   :63.19   Mean   : 6.649   Mean   :17.27  
##  3rd Qu.:71.51   3rd Qu.: 6.280   3rd Qu.:17.70  
##  Max.   :75.34   Max.   :19.530   Max.   :25.84
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Palaepaphos

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(dplyr); 
library(GGally); 
library(corrplot); 
library(tidyr); 
library(openxlsx); 
library(cluster); 
library(ggplot2); 
library(ggfortify); 
library(pca3d);
library(FactoMineR); # fast PCA graphs
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters
library(tidyverse) # rectangles around HClust

# Read data
pxrf <- read.xlsx("data/pxrf_PP.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
values <- c("MgO", "Al2O3", "SiO2", 
            "S", "Cl", "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(pxrf_NA, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

# Datasets with only our new mudbrick samples (no soil or previously published ones)
MB_nona <- pxrf_no_na[c(1:11), ]
MB_all <- pxrf_all[c(1:11), ]

# Removing As, Nb, Rh, Ag and Pb as they have almost no viable measurement values
MB_all <- MB_all %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Rh) %>%   
        select(-Ag) %>% 
        select(-Pb) 

pxrf_no_na:

summary(pxrf_no_na)
##             Area                Type          Ti               Mn         
##  old          :34   mudbrick      :11   Min.   :0.0969   Min.   :0.01190  
##  LA54:4       : 5   mudbrick (old):34   1st Qu.:0.1814   1st Qu.:0.03893  
##  LA59:2       : 5   soil          : 8   Median :0.2118   Median :0.04670  
##  Laona soil l1: 2                       Mean   :0.2180   Mean   :0.04549  
##  Laona soil l2: 2                       3rd Qu.:0.2594   3rd Qu.:0.05440  
##  Laona soil l3: 2                       Max.   :0.3571   Max.   :0.06800  
##  (Other)      : 3                                                         
##        Fe               Ni                 Cu                 Zr          
##  Min.   :0.9617   Min.   :0.000400   Min.   :0.003600   Min.   :0.004300  
##  1st Qu.:2.2241   1st Qu.:0.002200   1st Qu.:0.005933   1st Qu.:0.007800  
##  Median :2.7214   Median :0.003000   Median :0.006500   Median :0.008800  
##  Mean   :2.8091   Mean   :0.002856   Mean   :0.006761   Mean   :0.008810  
##  3rd Qu.:3.3530   3rd Qu.:0.003500   3rd Qu.:0.007400   3rd Qu.:0.009667  
##  Max.   :4.7754   Max.   :0.005900   Max.   :0.011333   Max.   :0.012200  
## 

pxrf_all:

summary(pxrf_all)
##             Area                Type         MgO            Al2O3      
##  old          :34   mudbrick      :11   Min.   :2.919   Min.   :2.182  
##  LA54:4       : 5   mudbrick (old):34   1st Qu.:3.369   1st Qu.:3.403  
##  LA59:2       : 5   soil          : 8   Median :4.088   Median :4.317  
##  Laona soil l1: 2                       Mean   :4.011   Mean   :4.168  
##  Laona soil l2: 2                       3rd Qu.:4.499   3rd Qu.:4.702  
##  Laona soil l3: 2                       Max.   :5.125   Max.   :5.902  
##  (Other)      : 3                       NA's   :34      NA's   :34     
##       SiO2              S                 Cl               K2O        
##  Min.   : 9.164   Min.   :0.04373   Min.   :0.01113   Min.   :0.1980  
##  1st Qu.:14.598   1st Qu.:0.10686   1st Qu.:0.02597   1st Qu.:0.3599  
##  Median :17.087   Median :0.21532   Median :0.05117   Median :0.4238  
##  Mean   :16.502   Mean   :0.25538   Mean   :0.13135   Mean   :0.4340  
##  3rd Qu.:18.560   3rd Qu.:0.32239   3rd Qu.:0.10892   3rd Qu.:0.4795  
##  Max.   :22.259   Max.   :0.74597   Max.   :0.93440   Max.   :0.6584  
##  NA's   :34       NA's   :35        NA's   :34        NA's   :36      
##       CaO              Ti               V                 Mn         
##  Min.   :11.28   Min.   :0.0969   Min.   :0.00267   Min.   :0.01190  
##  1st Qu.:14.53   1st Qu.:0.1814   1st Qu.:0.00334   1st Qu.:0.03893  
##  Median :18.05   Median :0.2118   Median :0.00420   Median :0.04670  
##  Mean   :17.94   Mean   :0.2180   Mean   :0.00425   Mean   :0.04549  
##  3rd Qu.:20.82   3rd Qu.:0.2594   3rd Qu.:0.00512   3rd Qu.:0.05440  
##  Max.   :25.59   Max.   :0.3571   Max.   :0.00587   Max.   :0.06800  
##  NA's   :34                       NA's   :41                         
##        Fe               Ni                 Cu                 Zn         
##  Min.   :0.9617   Min.   :0.000400   Min.   :0.003600   Min.   :0.00167  
##  1st Qu.:2.2241   1st Qu.:0.002200   1st Qu.:0.005933   1st Qu.:0.00308  
##  Median :2.7214   Median :0.003000   Median :0.006500   Median :0.00337  
##  Mean   :2.8091   Mean   :0.002856   Mean   :0.006761   Mean   :0.00335  
##  3rd Qu.:3.3530   3rd Qu.:0.003500   3rd Qu.:0.007400   3rd Qu.:0.00382  
##  Max.   :4.7754   Max.   :0.005900   Max.   :0.011333   Max.   :0.00500  
##                                                         NA's   :34       
##        As                Rb                Sr                Y          
##  Min.   :0.00033   Min.   :0.00120   Min.   :0.01790   Min.   :0.00087  
##  1st Qu.:0.00033   1st Qu.:0.00173   1st Qu.:0.02893   1st Qu.:0.00137  
##  Median :0.00040   Median :0.00203   Median :0.03403   Median :0.00160  
##  Mean   :0.00040   Mean   :0.00204   Mean   :0.03402   Mean   :0.00161  
##  3rd Qu.:0.00047   3rd Qu.:0.00213   3rd Qu.:0.03835   3rd Qu.:0.00173  
##  Max.   :0.00047   Max.   :0.00357   Max.   :0.04840   Max.   :0.00260  
##  NA's   :48        NA's   :35        NA's   :34        NA's   :36       
##        Zr                 Nb                Rh                Ag         
##  Min.   :0.004300   Min.   :0.00063   Min.   :0.01570   Min.   :0.00230  
##  1st Qu.:0.007800   1st Qu.:0.00063   1st Qu.:0.01683   1st Qu.:0.00282  
##  Median :0.008800   Median :0.00063   Median :0.01797   Median :0.00322  
##  Mean   :0.008810   Mean   :0.00063   Mean   :0.01797   Mean   :0.00330  
##  3rd Qu.:0.009667   3rd Qu.:0.00063   3rd Qu.:0.01910   3rd Qu.:0.00369  
##  Max.   :0.012200   Max.   :0.00063   Max.   :0.02023   Max.   :0.00447  
##                     NA's   :52        NA's   :51        NA's   :49       
##        Pb       
##  Min.   :0.002  
##  1st Qu.:0.002  
##  Median :0.002  
##  Mean   :0.002  
##  3rd Qu.:0.002  
##  Max.   :0.002  
##  NA's   :52

MB_nona:

summary(MB_nona)
##                Area               Type          Ti               Mn         
##  LA54:4          :5   mudbrick      :11   Min.   :0.0969   Min.   :0.01190  
##  LA59:2          :5   mudbrick (old): 0   1st Qu.:0.1663   1st Qu.:0.02397  
##  LA54:7          :1   soil          : 0   Median :0.1915   Median :0.03347  
##  Hadjuabudllah l1:0                       Mean   :0.1780   Mean   :0.03070  
##  Hadjuabudllah l2:0                       3rd Qu.:0.1941   3rd Qu.:0.03895  
##  Laona soil l1   :0                       Max.   :0.2295   Max.   :0.04177  
##  (Other)         :0                                                         
##        Fe               Ni                 Cu                 Zr          
##  Min.   :0.9617   Min.   :0.001400   Min.   :0.004500   Min.   :0.005400  
##  1st Qu.:1.6270   1st Qu.:0.002317   1st Qu.:0.005533   1st Qu.:0.007767  
##  Median :2.1312   Median :0.003000   Median :0.006700   Median :0.008733  
##  Mean   :1.8912   Mean   :0.002812   Mean   :0.007333   Mean   :0.008494  
##  3rd Qu.:2.2622   3rd Qu.:0.003367   3rd Qu.:0.008717   3rd Qu.:0.009100  
##  Max.   :2.4481   Max.   :0.003967   Max.   :0.011333   Max.   :0.011833  
## 

MB_all:

summary(MB_all)
##                Area               Type         MgO            Al2O3      
##  LA54:4          :5   mudbrick      :11   Min.   :2.919   Min.   :2.182  
##  LA59:2          :5   mudbrick (old): 0   1st Qu.:3.531   1st Qu.:3.403  
##  LA54:7          :1   soil          : 0   Median :4.088   Median :4.317  
##  Hadjuabudllah l1:0                       Mean   :3.999   Mean   :3.941  
##  Hadjuabudllah l2:0                       3rd Qu.:4.400   3rd Qu.:4.593  
##  Laona soil l1   :0                       Max.   :5.125   Max.   :5.270  
##  (Other)         :0                                                      
##       SiO2              S                Cl               K2O        
##  Min.   : 9.164   Min.   :0.1622   Min.   :0.02723   Min.   :0.1980  
##  1st Qu.:13.366   1st Qu.:0.2737   1st Qu.:0.06783   1st Qu.:0.3920  
##  Median :17.705   Median :0.3221   Median :0.09397   Median :0.4376  
##  Mean   :15.874   Mean   :0.3550   Mean   :0.20759   Mean   :0.4312  
##  3rd Qu.:18.560   3rd Qu.:0.3809   3rd Qu.:0.23782   3rd Qu.:0.4757  
##  Max.   :20.334   Max.   :0.7460   Max.   :0.93440   Max.   :0.6052  
##                                                      NA's   :1       
##       CaO              Ti               V                  Mn         
##  Min.   :11.28   Min.   :0.0969   Min.   :0.002667   Min.   :0.01190  
##  1st Qu.:18.46   1st Qu.:0.1663   1st Qu.:0.004317   1st Qu.:0.02397  
##  Median :20.74   Median :0.1915   Median :0.004750   Median :0.03347  
##  Mean   :19.67   Mean   :0.1780   Mean   :0.004721   Mean   :0.03070  
##  3rd Qu.:22.12   3rd Qu.:0.1941   3rd Qu.:0.005633   3rd Qu.:0.03895  
##  Max.   :25.59   Max.   :0.2295   Max.   :0.005867   Max.   :0.04177  
##                                   NA's   :3                           
##        Fe               Ni                 Cu                 Zn          
##  Min.   :0.9617   Min.   :0.001400   Min.   :0.004500   Min.   :0.001667  
##  1st Qu.:1.6270   1st Qu.:0.002317   1st Qu.:0.005533   1st Qu.:0.002467  
##  Median :2.1312   Median :0.003000   Median :0.006700   Median :0.003233  
##  Mean   :1.8912   Mean   :0.002812   Mean   :0.007333   Mean   :0.003170  
##  3rd Qu.:2.2622   3rd Qu.:0.003367   3rd Qu.:0.008717   3rd Qu.:0.003800  
##  Max.   :2.4481   Max.   :0.003967   Max.   :0.011333   Max.   :0.005000  
##                                                                           
##        Rb                 Sr                Y                   Zr          
##  Min.   :0.001200   Min.   :0.01790   Min.   :0.0008667   Min.   :0.005400  
##  1st Qu.:0.001617   1st Qu.:0.02893   1st Qu.:0.0013333   1st Qu.:0.007767  
##  Median :0.001767   Median :0.03507   Median :0.0014000   Median :0.008733  
##  Mean   :0.001750   Mean   :0.03438   Mean   :0.0013741   Mean   :0.008494  
##  3rd Qu.:0.001925   3rd Qu.:0.03835   3rd Qu.:0.0014667   3rd Qu.:0.009100  
##  Max.   :0.002133   Max.   :0.04840   Max.   :0.0017333   Max.   :0.011833  
##  NA's   :1                            NA's   :2

pXRF: K means cluster analysis

Only the “new” mudbrick samples

# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(MB_nona[3:8], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(MB_nona[3:8], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(MB_nona[3:8], mapping = aes(col = cluster_x))

pXRF: PCA

PCA with only the no-NA:s elements:

# PCA for only non-NA elements
colnames(MB_nona)
## [1] "Area" "Type" "Ti"   "Mn"   "Fe"   "Ni"   "Cu"   "Zr"
pca_1 <- prcomp(MB_nona[3:8], scale = TRUE, center = TRUE)
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6
## Standard deviation     2.0136 1.0789 0.70395 0.45806 0.25323 0.10779
## Proportion of Variance 0.6758 0.1940 0.08259 0.03497 0.01069 0.00194
## Cumulative Proportion  0.6758 0.8698 0.95241 0.98738 0.99806 1.00000
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))

autoplot(pca_1, data=MB_nona, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos 1: no NA:s at all")

PCA(MB_nona[3:8])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 6 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with all the feasible elements:

# NA:s converted to zeros
MB_all[is.na(MB_all)] <- 0

colnames(MB_all)
##  [1] "Area"  "Type"  "MgO"   "Al2O3" "SiO2"  "S"     "Cl"    "K2O"   "CaO"  
## [10] "Ti"    "V"     "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "Rb"    "Sr"   
## [19] "Y"     "Zr"
pca_2 <- prcomp((MB_all[3:20]), scale = TRUE, center = TRUE)
summary(pca_2)
## Importance of components:
##                           PC1    PC2     PC3     PC4    PC5     PC6    PC7
## Standard deviation     3.1620 1.8251 1.26077 1.10902 0.8950 0.63845 0.5612
## Proportion of Variance 0.5554 0.1851 0.08831 0.06833 0.0445 0.02265 0.0175
## Cumulative Proportion  0.5554 0.7405 0.82882 0.89715 0.9416 0.96429 0.9818
##                           PC8     PC9    PC10      PC11
## Standard deviation     0.4388 0.34174 0.13583 3.155e-16
## Proportion of Variance 0.0107 0.00649 0.00103 0.000e+00
## Cumulative Proportion  0.9925 0.99897 1.00000 1.000e+00
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))

autoplot(pca_2, data=MB_all, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos 2: NA:s permitted")

PCA(MB_all[3:20])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 11 individuals, described by 18 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

PCA with all the Mudbrick and soil samples

Including the previously published ones, No NA values permitted

# PCA for only non-NA elements
colnames(pxrf_no_na)
## [1] "Area" "Type" "Ti"   "Mn"   "Fe"   "Ni"   "Cu"   "Zr"
pca_3 <- prcomp(na.omit(pxrf_no_na[3:6]), scale = TRUE, center = TRUE)
summary(pca_3)
## Importance of components:
##                           PC1    PC2     PC3     PC4
## Standard deviation     1.6514 0.9995 0.41614 0.31724
## Proportion of Variance 0.6818 0.2498 0.04329 0.02516
## Cumulative Proportion  0.6818 0.9315 0.97484 1.00000
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))

autoplot(pca_3, data=pxrf_no_na, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

autoplot(pca_3, data=pxrf_no_na, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by sample type")

PCA with all the Mudbrick samples

Including the previously published ones, NA values are permitted This is not viable, as the difference in what values are available is too different between old and new samples.

# NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

colnames(pxrf_all)
##  [1] "Area"  "Type"  "MgO"   "Al2O3" "SiO2"  "S"     "Cl"    "K2O"   "CaO"  
## [10] "Ti"    "V"     "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "As"    "Rb"   
## [19] "Sr"    "Y"     "Zr"    "Nb"    "Rh"    "Ag"    "Pb"
pca_4 <- prcomp(pxrf_all[3:25], scale = TRUE, center = TRUE)
summary(pca_4)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     3.3748 1.8765 1.50629 1.13013 1.04455 0.89816 0.83735
## Proportion of Variance 0.4952 0.1531 0.09865 0.05553 0.04744 0.03507 0.03049
## Cumulative Proportion  0.4952 0.6483 0.74692 0.80245 0.84989 0.88497 0.91545
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.77794 0.66209 0.53427 0.40495 0.35837 0.33354 0.27679
## Proportion of Variance 0.02631 0.01906 0.01241 0.00713 0.00558 0.00484 0.00333
## Cumulative Proportion  0.94176 0.96082 0.97323 0.98036 0.98595 0.99078 0.99412
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.22788 0.17584 0.14605 0.12032 0.10296 0.06388 0.04325
## Proportion of Variance 0.00226 0.00134 0.00093 0.00063 0.00046 0.00018 0.00008
## Cumulative Proportion  0.99637 0.99772 0.99865 0.99927 0.99974 0.99991 0.99999
##                           PC22      PC23
## Standard deviation     0.01157 1.468e-16
## Proportion of Variance 0.00001 0.000e+00
## Cumulative Proportion  1.00000 1.000e+00
biplot(pca_4, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"))

autoplot(pca_4, data=pxrf_all, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by area")

autoplot(pca_4, data=pxrf_all, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Palaepaphos grouped by sample type")

pXRF: HCA

HCA including only new mudbrick samples:

# HCA dendrogam 1
dend <- 
    MB_all[3:20] %>%           # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=5) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=5) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2
HCA.ward5 <- hclust(dist(MB_all[3:20], method="euclid"), method="ward.D2")
plot(HCA.ward5, main="Ward's method")
rect.hclust(HCA.ward5, k=5)

# Divisive hierarchical clustering (DIANA) just for comparison
hc4 <- diana(MB_all[3:20])
pltree(hc4, cex = 1, hang = -1, main = "Dendrogram of diana")

# Divise coefficient; amount of clustering structure found
hc4$dc
## [1] 0.7894269

HCA including all mudbrick samples, only no NA:s elements :

# HCA dendrogam 1
dend <- 
    pxrf_no_na[1:45, 3:8] %>%           # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=5) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=5) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2
HCA.ward5 <- hclust(dist(pxrf_no_na[1:45, 3:8], method="euclid"), method="ward.D2")
plot(HCA.ward5, main="Ward's method")
rect.hclust(HCA.ward5, k=5)

# Divisive hierarchical clustering (DIANA) just for comparison
hc4 <- diana(pxrf_no_na[3:8])
pltree(hc4, cex = 1, hang = -1, main = "Dendrogram of diana")

# Divise coefficient; amount of clustering structure found
hc4$dc
## [1] 0.9804178

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_PP.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Creating subsets for only mudbricks
MB_grain <- grain_01[c(1:11),]
MB_context <- grain[c(1:11),]

# Ternary plots
ggtern(data=MB_grain, aes(x=Clay, y=Sand, z=Silt, color=MB_context$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(MB_grain)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_PP.xlsx", sep=";")
tga <- read.xlsx("data/tga_PP.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet;weight       sample;wet   
##  Length:27          Min.   :8.100   Min.   : 9.637   Min.   :1.537  
##  Class :character   1st Qu.:8.639   1st Qu.:10.859   1st Qu.:2.193  
##  Mode  :character   Median :8.997   Median :11.184   Median :2.371  
##                     Mean   :8.988   Mean   :11.274   Mean   :2.286  
##                     3rd Qu.:9.454   3rd Qu.:11.844   3rd Qu.:2.453  
##                     Max.   :9.560   Max.   :12.301   Max.   :2.785  
##    dry_weight      after_550_C      after_950_C      context         
##  Min.   : 9.595   Min.   : 9.484   Min.   : 9.23   Length:27         
##  1st Qu.:10.813   1st Qu.:10.721   1st Qu.:10.27   Class :character  
##  Median :11.132   Median :11.019   Median :10.59   Mode  :character  
##  Mean   :11.216   Mean   :11.113   Mean   :10.67                     
##  3rd Qu.:11.777   3rd Qu.:11.681   3rd Qu.:11.18                     
##  Max.   :12.218   Max.   :12.076   Max.   :11.56                     
##      type          
##  Length:27         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:30          Length:30          Length:30          Length:30         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:30          Length:30          Length:30          Length:30         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:30         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1] 14.55842 18.28992 20.42575 21.68496 25.15246
## 
## $n
## [1] 27
## 
## $conf
## [1] 19.39341 21.45808
## 
## $out
## [1] 33.21076 33.78112
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
##  [1] "PP-1"   "PP-2"   "PP-3"   "PP-4"   "PP-5"   "PP-6"   "PP-7"   "PP-8"  
##  [9] "PP-9"   "PP-10"  "PP-11"  "PP-12"  "PP-13"  "PP-14"  "PP-15"  "PP-16" 
## [17] "PP-17"  "PP-18"  "PP-19"  "PP-1_2" "PP-2_2" "PP-3_2" "PP-4_2" "PP-5_2"
## [25] "PP-6_2" "PP-7_2" "PP-8_2"
loi <- subset(loi[1:19, ])

ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(type))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(loi$c550)

barplot(loi$c950)

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1] 14.54839 18.93682 20.82648 22.49982 24.52314
## 
## $n
## [1] 28
## 
## $conf
## [1] 19.76259 21.89036
## 
## $out
## [1] 31.05334 33.79251
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_PP.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :62.38   Min.   : 7.27   Min.   :19.90  
##  1st Qu.:63.47   1st Qu.: 8.50   1st Qu.:21.36  
##  Median :64.19   Median : 9.34   Median :21.72  
##  Mean   :65.50   Mean   : 9.29   Mean   :21.92  
##  3rd Qu.:65.78   3rd Qu.:10.56   3rd Qu.:22.96  
##  Max.   :72.00   Max.   :10.90   Max.   :23.90
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Artaxata

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);

library(GGally); 
library(corrplot); 
library(tidyr); 
library(cluster); 
library(ggplot2); 
library(ggrepel);
library(ggfortify); 
library(pca3d);
library(ggdendro); # dendrograms together with ggplot
library(dendextend); # dendrograms - change parameters

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AA.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Turn any "LOD":s and missing values to "NA":s to allow scaling
# Removing "Na", S", "Cl", "P" and "Ba" already
values <- c("MgO", "Al2O3", "SiO2", 
            "K2O", "CaO", "Ti", "V", "Cr", 
            "Mn", "Fe", "Co", "Ni", "Cu", "Zn", "As", 
            "Se", "Rb", "Sr", "Y", "Zr", "Nb", "Mo", 
            "Rh", "Pd", "Ag", "Cd", "Sn", "Sb", 
            "La", "Ce", "Hf", "Ta", "W", "Pt", "Au", 
            "Hg", "Tl", "Pb", "Bi", "Th", "U")

pxrf_NA <- select(pxrf, one_of(values))
pxrf_NA <- pxrf_NA %>% mutate_if(is.character,as.numeric)

# Scaling the data with standard z-score method
scaled_pxrf <- as.data.frame(scale(pxrf_NA))

# Average data by "Sample", "Area" and "Type" columns from the original dataset
average_pxrf <- aggregate(scaled_pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Assign sample names as row names, and rename the other newly created columns back to "Type" and "Area" for clarity
rownames(average_pxrf) <- average_pxrf$Group.1
average_pxrf <- average_pxrf %>% select(-Group.1)
names(average_pxrf)[names(average_pxrf) == "Group.2"] <- "Area"
names(average_pxrf)[names(average_pxrf) == "Group.3"] <- "Type"

# Change "Type" and "Area" to factors
average_pxrf$Type <- factor(average_pxrf$Type)
average_pxrf$Area <- factor(average_pxrf$Area)

# Removing columns that only have NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- average_pxrf %>% select_if(all_na)

# Removing columns if they contain any NA values at all
any_na <- function(x) all(!is.na(x))
pxrf_no_na <- average_pxrf %>% select_if(any_na)

pxrf_no_na:

summary(pxrf_no_na)
##               Area         Type         MgO              Al2O3         
##  Hill II        :3   mudbrick:10   Min.   :-0.7957   Min.   :-1.47906  
##  Phase I        :2                 1st Qu.:-0.4773   1st Qu.:-0.41015  
##  Phase II       :2                 Median :-0.2741   Median : 0.05631  
##  Phase II or III:2                 Mean   : 0.0000   Mean   : 0.00000  
##  Urartian silo  :1                 3rd Qu.: 0.3559   3rd Qu.: 0.45276  
##                                    Max.   : 1.9131   Max.   : 1.38944  
##       SiO2              K2O                CaO                   Ti          
##  Min.   :-1.6654   Min.   :-1.55669   Min.   :-1.4333259   Min.   :-1.87044  
##  1st Qu.:-0.3827   1st Qu.:-0.68787   1st Qu.:-0.6680205   1st Qu.:-0.31750  
##  Median : 0.3100   Median : 0.03848   Median :-0.0001026   Median : 0.02437  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000000   Mean   : 0.00000  
##  3rd Qu.: 0.6156   3rd Qu.: 0.75515   3rd Qu.: 0.7641718   3rd Qu.: 0.81009  
##  Max.   : 0.8556   Max.   : 1.11418   Max.   : 1.2091486   Max.   : 0.86883  
##        Mn                 Fe                 Ni                 Cu         
##  Min.   :-1.03797   Min.   :-0.73814   Min.   :-0.94570   Min.   :-1.2582  
##  1st Qu.:-0.30897   1st Qu.:-0.28874   1st Qu.:-0.66424   1st Qu.:-0.5607  
##  Median :-0.09355   Median :-0.08696   Median :-0.07318   Median : 0.1094  
##  Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.: 0.21238   3rd Qu.: 0.08823   3rd Qu.: 0.45456   3rd Qu.: 0.3966  
##  Max.   : 1.57654   Max.   : 1.60860   Max.   : 1.50299   Max.   : 1.5865  
##        Zn                Rb                Sr                Y          
##  Min.   :-0.8847   Min.   :-1.2036   Min.   :-1.4059   Min.   :-1.0149  
##  1st Qu.:-0.3508   1st Qu.:-0.4637   1st Qu.:-0.3803   1st Qu.:-0.3542  
##  Median :-0.0839   Median :-0.2006   Median :-0.1555   Median :-0.1163  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.2720   3rd Qu.: 0.3256   3rd Qu.: 0.4915   3rd Qu.: 0.1216  
##  Max.   : 1.5813   Max.   : 1.4601   Max.   : 1.4314   Max.   : 1.7338  
##        Zr         
##  Min.   :-1.1158  
##  1st Qu.:-0.5166  
##  Median :-0.3001  
##  Mean   : 0.0000  
##  3rd Qu.: 0.1279  
##  Max.   : 1.9054

pxrf_all:

summary(pxrf_all)
##               Area         Type         MgO              Al2O3         
##  Hill II        :3   mudbrick:10   Min.   :-0.7957   Min.   :-1.47906  
##  Phase I        :2                 1st Qu.:-0.4773   1st Qu.:-0.41015  
##  Phase II       :2                 Median :-0.2741   Median : 0.05631  
##  Phase II or III:2                 Mean   : 0.0000   Mean   : 0.00000  
##  Urartian silo  :1                 3rd Qu.: 0.3559   3rd Qu.: 0.45276  
##                                    Max.   : 1.9131   Max.   : 1.38944  
##                                                                        
##       SiO2              K2O                CaO                   Ti          
##  Min.   :-1.6654   Min.   :-1.55669   Min.   :-1.4333259   Min.   :-1.87044  
##  1st Qu.:-0.3827   1st Qu.:-0.68787   1st Qu.:-0.6680205   1st Qu.:-0.31750  
##  Median : 0.3100   Median : 0.03848   Median :-0.0001026   Median : 0.02437  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.0000000   Mean   : 0.00000  
##  3rd Qu.: 0.6156   3rd Qu.: 0.75515   3rd Qu.: 0.7641718   3rd Qu.: 0.81009  
##  Max.   : 0.8556   Max.   : 1.11418   Max.   : 1.2091486   Max.   : 0.86883  
##                                                                              
##        V                  Cr                  Mn                 Fe          
##  Min.   :-0.46003   Min.   :-0.686179   Min.   :-1.03797   Min.   :-0.73814  
##  1st Qu.:-0.43566   1st Qu.:-0.459188   1st Qu.:-0.30897   1st Qu.:-0.28874  
##  Median :-0.19193   Median :-0.232198   Median :-0.09355   Median :-0.08696  
##  Mean   :-0.09932   Mean   :-0.232198   Mean   : 0.00000   Mean   : 0.00000  
##  3rd Qu.: 0.02742   3rd Qu.:-0.005208   3rd Qu.: 0.21238   3rd Qu.: 0.08823  
##  Max.   : 0.56361   Max.   : 0.221783   Max.   : 1.57654   Max.   : 1.60860  
##  NA's   :5          NA's   :8                                                
##        Ni                 Cu                Zn                As          
##  Min.   :-0.94570   Min.   :-1.2582   Min.   :-0.8847   Min.   :-1.03932  
##  1st Qu.:-0.66424   1st Qu.:-0.5607   1st Qu.:-0.3508   1st Qu.:-0.21581  
##  Median :-0.07318   Median : 0.1094   Median :-0.0839   Median :-0.05111  
##  Mean   : 0.00000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.05869  
##  3rd Qu.: 0.45456   3rd Qu.: 0.3966   3rd Qu.: 0.2720   3rd Qu.: 0.44299  
##  Max.   : 1.50299   Max.   : 1.5865   Max.   : 1.5813   Max.   : 1.10179  
##                                                         NA's   :1         
##        Rb                Sr                Y                 Zr         
##  Min.   :-1.2036   Min.   :-1.4059   Min.   :-1.0149   Min.   :-1.1158  
##  1st Qu.:-0.4637   1st Qu.:-0.3803   1st Qu.:-0.3542   1st Qu.:-0.5166  
##  Median :-0.2006   Median :-0.1555   Median :-0.1163   Median :-0.3001  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.3256   3rd Qu.: 0.4915   3rd Qu.: 0.1216   3rd Qu.: 0.1279  
##  Max.   : 1.4601   Max.   : 1.4314   Max.   : 1.7338   Max.   : 1.9054  
##                                                                         
##        Nb          
##  Min.   :-0.65796  
##  1st Qu.:-0.50862  
##  Median :-0.06060  
##  Mean   :-0.09379  
##  3rd Qu.:-0.06060  
##  Max.   : 0.93500  
##  NA's   :4

pXRF: K means cluster analysis

# Drop the more dubious elements
pxrf_1 <- pxrf_no_na %>% select(-Al2O3)
pxrf_1 <- pxrf_1 %>% select(-CaO)

# Optimal number of clusters in K-means analysis: 2
k_max <- 8
twcss <- sapply(1:k_max, function(k){kmeans(pxrf_1[3:12], k)$tot.withinss})
qplot(x = 1:k_max, y = twcss, geom = 'line')

# K-means clustering in ggpairs: 2 clusters
km <-kmeans(pxrf_1[3:12], centers = 2)
cluster_x <- as.factor(km$cluster)
ggpairs(pxrf_1, mapping = aes(col = cluster_x))

pXRF: PCA

PCA with only the no-NA:s elements:

# PCA
pca_1 <- prcomp(pxrf_1[3:15])
summary(pca_1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.4147 1.0889 0.9306 0.48309 0.37987 0.31662 0.24466
## Proportion of Variance 0.6901 0.1403 0.1025 0.02762 0.01708 0.01186 0.00708
## Cumulative Proportion  0.6901 0.8305 0.9330 0.96059 0.97767 0.98953 0.99662
##                            PC8     PC9      PC10
## Standard deviation     0.15630 0.06455 7.266e-17
## Proportion of Variance 0.00289 0.00049 0.000e+00
## Cumulative Proportion  0.99951 1.00000 1.000e+00
# PCA1 plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA limited elements")

autoplot(pca_1, data=pxrf_1, colour='Area', shape = FALSE, label = TRUE,  main = "PCA grouped by area")

PCA with all the feasible elements:

# PCA with all elements, NA:s converted to zeros
pxrf_all[is.na(pxrf_all)] <- 0

pca_2 <- prcomp(pxrf_all[3:21])
summary(pca_2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.6421 1.2026 0.98838 0.84185 0.45012 0.39002 0.30457
## Proportion of Variance 0.6529 0.1353 0.09136 0.06628 0.01895 0.01423 0.00868
## Cumulative Proportion  0.6529 0.7881 0.87946 0.94574 0.96469 0.97891 0.98759
##                            PC8     PC9      PC10
## Standard deviation     0.28180 0.23084 2.238e-16
## Proportion of Variance 0.00743 0.00498 0.000e+00
## Cumulative Proportion  0.99502 1.00000 1.000e+00
# PCA2 plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA more elements")

autoplot(pca_2, data=pxrf_all, colour='Area', shape = FALSE, label = TRUE,  main = "PCA more elements grouped by area")

pXRF: HCA

# HCA

# HCA dendrogam 1
dend <- 
    pxrf_1[3:12] %>%                    # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    set("branches_k_color", k=4) %>% 
    set("branches_lwd", 1.2) %>%
    set("labels_col", c("red", "black", "blue"), k=4) %>%    
    set("labels_cex", 1) %>% 
    set("leaves_pch", NA) 
# plot the dend in usual "base" plotting engine:
plot(dend)

# HCA dendrogam 2: 
HCA.ward4 <- hclust(dist(pxrf_1[3:12], method="euclid"), method="ward.D2")
plot(HCA.ward4, main="HCA with clusters")
rect.hclust(HCA.ward4, k=4)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern)

# Read and filter data
grain <- read.xlsx("data/grain_AA.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
grain <- grain %>% select(-Group.1)
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[4:6], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(tidyr); 

loi <- read.xlsx("data/loi_AA.xlsx", sep=";")
tga <- read.xlsx("data/tga_AA.xlsx", sep=";")

summary(loi)
##     sample             crucible       wet_weight      sample_wet   
##  Length:10          Min.   :8.066   Min.   :10.70   Min.   :1.923  
##  Class :character   1st Qu.:8.461   1st Qu.:10.93   1st Qu.:2.326  
##  Mode  :character   Median :8.995   Median :11.48   Median :2.558  
##                     Mean   :8.892   Mean   :11.39   Mean   :2.496  
##                     3rd Qu.:9.174   3rd Qu.:11.75   3rd Qu.:2.701  
##                     Max.   :9.890   Max.   :12.18   Max.   :2.842  
##    dry_weight     after_550_C     after_950_C      context         
##  Min.   :10.58   Min.   :10.47   Min.   :10.31   Length:10         
##  1st Qu.:10.85   1st Qu.:10.78   1st Qu.:10.60   Class :character  
##  Median :11.43   Median :11.33   Median :11.16   Mode  :character  
##  Mean   :11.32   Mean   :11.22   Mean   :11.07                     
##  3rd Qu.:11.69   3rd Qu.:11.58   3rd Qu.:11.42                     
##  Max.   :12.13   Max.   :12.06   Max.   :11.94                     
##      type          
##  Length:10         
##  Class :character  
##  Mode  :character  
##                    
##                    
## 
summary(tga)
##      Name           Analysis;Date         Batch               Wet           
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      Dry              Mass_550           Mass_950           context         
##  Length:12          Length:12          Length:12          Length:12         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      type          
##  Length:12         
##  Class :character  
##  Mode  :character
# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (sample mass after drying)
# "c550" (sample mass after 550 C)
# "c950" (sample mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

Manual LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=loi)

boxplot(c950~context, data=loi)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(loi$c950)
## $stats
## [1] 5.507166 5.756990 6.617026 7.738517 7.965070
## 
## $n
## [1] 10
## 
## $conf
## [1] 5.626976 7.607076
## 
## $out
## numeric(0)
# The two high carbonate outliers are both AY-53 (two different measurements to make sure the first batch wasn't misleading)

# Removing duplicates from the next graph
rownames(loi)
##  [1] "AA-1"  "AA-2"  "AA-3"  "AA-4"  "AA-5"  "AA-6"  "AA-7"  "AA-8"  "AA-9" 
## [10] "AA-10"
loi <- subset(loi[1:47, ])

### ggplots later with context and type

barplot(loi$c550)

barplot(loi$c950)

# Stacked barplot (x = reorder(sample, LOI))
loi_long = gather(loi, key = var, value = value, c550, c950)

ggplot(loi_long, aes(x = reorder(sample, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

TGA LOI results

# Distribution of LOI in 550 C (1) and 950 C (2)
boxplot(c550~context, data=tga)

boxplot(c950~context, data=tga)

# boxplot.stats gives us the lower whisker or minimum, the interquartile range of the middle (the second and fourth values), the median (third value) and the upper whisker or maximum. 
boxplot.stats(tga$c950)
## $stats
## [1] 5.611934 6.118481 6.532557 7.126867 8.599873
## 
## $n
## [1] 11
## 
## $conf
## [1] 6.052174 7.012940
## 
## $out
## [1] 4.097341
# Removing duplicates from the next graph
#rownames(tga)
#loi <- subset(loi[1:47, ])

ggplot(tga, 
      aes(c550, c950, label = rownames(tga))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + labs(title = "Organic vs. carbonate content") +
      xlab('Organic LOI (%)') +
      ylab('Carbonate LOI (%)') +
      theme(axis.title = element_text())

barplot(tga$c550)

barplot(tga$c950)

# Stacked barplot (x = reorder(sample, LOI))
tga_long = gather(tga, key = var, value = value, c550, c950)

ggplot(tga_long, aes(x = reorder(Name, value), y = value, fill = var)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AA.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :51.37   Min.   :3.880   Min.   :15.92  
##  1st Qu.:57.73   1st Qu.:4.402   1st Qu.:16.49  
##  Median :58.79   Median :4.555   Median :16.93  
##  Mean   :58.18   Mean   :5.067   Mean   :17.32  
##  3rd Qu.:59.58   3rd Qu.:4.695   3rd Qu.:17.32  
##  Max.   :61.57   Max.   :8.670   Max.   :21.47
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)